etree.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. from __future__ import absolute_import, division, unicode_literals
  2. # pylint:disable=protected-access
  3. from pip._vendor.six import text_type
  4. import re
  5. from copy import copy
  6. from . import base
  7. from .. import _ihatexml
  8. from .. import constants
  9. from ..constants import namespaces
  10. from .._utils import moduleFactoryFactory
  11. tag_regexp = re.compile("{([^}]*)}(.*)")
  12. def getETreeBuilder(ElementTreeImplementation, fullTree=False):
  13. ElementTree = ElementTreeImplementation
  14. ElementTreeCommentType = ElementTree.Comment("asd").tag
  15. class Element(base.Node):
  16. def __init__(self, name, namespace=None):
  17. self._name = name
  18. self._namespace = namespace
  19. self._element = ElementTree.Element(self._getETreeTag(name,
  20. namespace))
  21. if namespace is None:
  22. self.nameTuple = namespaces["html"], self._name
  23. else:
  24. self.nameTuple = self._namespace, self._name
  25. self.parent = None
  26. self._childNodes = []
  27. self._flags = []
  28. def _getETreeTag(self, name, namespace):
  29. if namespace is None:
  30. etree_tag = name
  31. else:
  32. etree_tag = "{%s}%s" % (namespace, name)
  33. return etree_tag
  34. def _setName(self, name):
  35. self._name = name
  36. self._element.tag = self._getETreeTag(self._name, self._namespace)
  37. def _getName(self):
  38. return self._name
  39. name = property(_getName, _setName)
  40. def _setNamespace(self, namespace):
  41. self._namespace = namespace
  42. self._element.tag = self._getETreeTag(self._name, self._namespace)
  43. def _getNamespace(self):
  44. return self._namespace
  45. namespace = property(_getNamespace, _setNamespace)
  46. def _getAttributes(self):
  47. return self._element.attrib
  48. def _setAttributes(self, attributes):
  49. el_attrib = self._element.attrib
  50. el_attrib.clear()
  51. if attributes:
  52. # calling .items _always_ allocates, and the above truthy check is cheaper than the
  53. # allocation on average
  54. for key, value in attributes.items():
  55. if isinstance(key, tuple):
  56. name = "{%s}%s" % (key[2], key[1])
  57. else:
  58. name = key
  59. el_attrib[name] = value
  60. attributes = property(_getAttributes, _setAttributes)
  61. def _getChildNodes(self):
  62. return self._childNodes
  63. def _setChildNodes(self, value):
  64. del self._element[:]
  65. self._childNodes = []
  66. for element in value:
  67. self.insertChild(element)
  68. childNodes = property(_getChildNodes, _setChildNodes)
  69. def hasContent(self):
  70. """Return true if the node has children or text"""
  71. return bool(self._element.text or len(self._element))
  72. def appendChild(self, node):
  73. self._childNodes.append(node)
  74. self._element.append(node._element)
  75. node.parent = self
  76. def insertBefore(self, node, refNode):
  77. index = list(self._element).index(refNode._element)
  78. self._element.insert(index, node._element)
  79. node.parent = self
  80. def removeChild(self, node):
  81. self._childNodes.remove(node)
  82. self._element.remove(node._element)
  83. node.parent = None
  84. def insertText(self, data, insertBefore=None):
  85. if not(len(self._element)):
  86. if not self._element.text:
  87. self._element.text = ""
  88. self._element.text += data
  89. elif insertBefore is None:
  90. # Insert the text as the tail of the last child element
  91. if not self._element[-1].tail:
  92. self._element[-1].tail = ""
  93. self._element[-1].tail += data
  94. else:
  95. # Insert the text before the specified node
  96. children = list(self._element)
  97. index = children.index(insertBefore._element)
  98. if index > 0:
  99. if not self._element[index - 1].tail:
  100. self._element[index - 1].tail = ""
  101. self._element[index - 1].tail += data
  102. else:
  103. if not self._element.text:
  104. self._element.text = ""
  105. self._element.text += data
  106. def cloneNode(self):
  107. element = type(self)(self.name, self.namespace)
  108. if self._element.attrib:
  109. element._element.attrib = copy(self._element.attrib)
  110. return element
  111. def reparentChildren(self, newParent):
  112. if newParent.childNodes:
  113. newParent.childNodes[-1]._element.tail += self._element.text
  114. else:
  115. if not newParent._element.text:
  116. newParent._element.text = ""
  117. if self._element.text is not None:
  118. newParent._element.text += self._element.text
  119. self._element.text = ""
  120. base.Node.reparentChildren(self, newParent)
  121. class Comment(Element):
  122. def __init__(self, data):
  123. # Use the superclass constructor to set all properties on the
  124. # wrapper element
  125. self._element = ElementTree.Comment(data)
  126. self.parent = None
  127. self._childNodes = []
  128. self._flags = []
  129. def _getData(self):
  130. return self._element.text
  131. def _setData(self, value):
  132. self._element.text = value
  133. data = property(_getData, _setData)
  134. class DocumentType(Element):
  135. def __init__(self, name, publicId, systemId):
  136. Element.__init__(self, "<!DOCTYPE>")
  137. self._element.text = name
  138. self.publicId = publicId
  139. self.systemId = systemId
  140. def _getPublicId(self):
  141. return self._element.get("publicId", "")
  142. def _setPublicId(self, value):
  143. if value is not None:
  144. self._element.set("publicId", value)
  145. publicId = property(_getPublicId, _setPublicId)
  146. def _getSystemId(self):
  147. return self._element.get("systemId", "")
  148. def _setSystemId(self, value):
  149. if value is not None:
  150. self._element.set("systemId", value)
  151. systemId = property(_getSystemId, _setSystemId)
  152. class Document(Element):
  153. def __init__(self):
  154. Element.__init__(self, "DOCUMENT_ROOT")
  155. class DocumentFragment(Element):
  156. def __init__(self):
  157. Element.__init__(self, "DOCUMENT_FRAGMENT")
  158. def testSerializer(element):
  159. rv = []
  160. def serializeElement(element, indent=0):
  161. if not(hasattr(element, "tag")):
  162. element = element.getroot()
  163. if element.tag == "<!DOCTYPE>":
  164. if element.get("publicId") or element.get("systemId"):
  165. publicId = element.get("publicId") or ""
  166. systemId = element.get("systemId") or ""
  167. rv.append("""<!DOCTYPE %s "%s" "%s">""" %
  168. (element.text, publicId, systemId))
  169. else:
  170. rv.append("<!DOCTYPE %s>" % (element.text,))
  171. elif element.tag == "DOCUMENT_ROOT":
  172. rv.append("#document")
  173. if element.text is not None:
  174. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  175. if element.tail is not None:
  176. raise TypeError("Document node cannot have tail")
  177. if hasattr(element, "attrib") and len(element.attrib):
  178. raise TypeError("Document node cannot have attributes")
  179. elif element.tag == ElementTreeCommentType:
  180. rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
  181. else:
  182. assert isinstance(element.tag, text_type), \
  183. "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
  184. nsmatch = tag_regexp.match(element.tag)
  185. if nsmatch is None:
  186. name = element.tag
  187. else:
  188. ns, name = nsmatch.groups()
  189. prefix = constants.prefixes[ns]
  190. name = "%s %s" % (prefix, name)
  191. rv.append("|%s<%s>" % (' ' * indent, name))
  192. if hasattr(element, "attrib"):
  193. attributes = []
  194. for name, value in element.attrib.items():
  195. nsmatch = tag_regexp.match(name)
  196. if nsmatch is not None:
  197. ns, name = nsmatch.groups()
  198. prefix = constants.prefixes[ns]
  199. attr_string = "%s %s" % (prefix, name)
  200. else:
  201. attr_string = name
  202. attributes.append((attr_string, value))
  203. for name, value in sorted(attributes):
  204. rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
  205. if element.text:
  206. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  207. indent += 2
  208. for child in element:
  209. serializeElement(child, indent)
  210. if element.tail:
  211. rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
  212. serializeElement(element, 0)
  213. return "\n".join(rv)
  214. def tostring(element): # pylint:disable=unused-variable
  215. """Serialize an element and its child nodes to a string"""
  216. rv = []
  217. filter = _ihatexml.InfosetFilter()
  218. def serializeElement(element):
  219. if isinstance(element, ElementTree.ElementTree):
  220. element = element.getroot()
  221. if element.tag == "<!DOCTYPE>":
  222. if element.get("publicId") or element.get("systemId"):
  223. publicId = element.get("publicId") or ""
  224. systemId = element.get("systemId") or ""
  225. rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
  226. (element.text, publicId, systemId))
  227. else:
  228. rv.append("<!DOCTYPE %s>" % (element.text,))
  229. elif element.tag == "DOCUMENT_ROOT":
  230. if element.text is not None:
  231. rv.append(element.text)
  232. if element.tail is not None:
  233. raise TypeError("Document node cannot have tail")
  234. if hasattr(element, "attrib") and len(element.attrib):
  235. raise TypeError("Document node cannot have attributes")
  236. for child in element:
  237. serializeElement(child)
  238. elif element.tag == ElementTreeCommentType:
  239. rv.append("<!--%s-->" % (element.text,))
  240. else:
  241. # This is assumed to be an ordinary element
  242. if not element.attrib:
  243. rv.append("<%s>" % (filter.fromXmlName(element.tag),))
  244. else:
  245. attr = " ".join(["%s=\"%s\"" % (
  246. filter.fromXmlName(name), value)
  247. for name, value in element.attrib.items()])
  248. rv.append("<%s %s>" % (element.tag, attr))
  249. if element.text:
  250. rv.append(element.text)
  251. for child in element:
  252. serializeElement(child)
  253. rv.append("</%s>" % (element.tag,))
  254. if element.tail:
  255. rv.append(element.tail)
  256. serializeElement(element)
  257. return "".join(rv)
  258. class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
  259. documentClass = Document
  260. doctypeClass = DocumentType
  261. elementClass = Element
  262. commentClass = Comment
  263. fragmentClass = DocumentFragment
  264. implementation = ElementTreeImplementation
  265. def testSerializer(self, element):
  266. return testSerializer(element)
  267. def getDocument(self):
  268. if fullTree:
  269. return self.document._element
  270. else:
  271. if self.defaultNamespace is not None:
  272. return self.document._element.find(
  273. "{%s}html" % self.defaultNamespace)
  274. else:
  275. return self.document._element.find("html")
  276. def getFragment(self):
  277. return base.TreeBuilder.getFragment(self)._element
  278. return locals()
  279. getETreeModule = moduleFactoryFactory(getETreeBuilder)